Code
knitr::opts_chunk$set(echo = TRUE)To work with rasters and vectors in R, we need some key packages:
sf: Support for simple features, a standardized way to encode spatial vector data.stars, terra, or raster to handle raster data.terra replaces the raster package. The interfaces of terra and raster are similar, but terra is simpler, faster, and can do more.ggspatial: Spatial Data Framework for ggplot2 to map data.tidyverse for data manipulation.knitr::opts_chunk$set(echo = TRUE)IPC_pre_conference_workshop.IPC_pre_conference_workshop.Rproj included in this project folder.
Analysis.qmd and begin following along with these code chunks.# Installs any necessary packages if not already installed
# sometime you need to install Rtools before (https://cran.r-project.org/bin/windows/Rtools/rtools44/rtools.html)
for(
pkg in c(
"srvyr", "survey", "tidyverse", "sf", "terra", "ggspatial", "stars", "stringi", "exactextractr", "haven", "spdep", "geodata", "tmap" #, "rasters", "gtools", "srvyr", "survey", "lme4", "broom.mixed", "broom", "remotes"
)
){
if(!require(pkg, quietly = TRUE, character.only = TRUE)){
install.packages(pkg)
}
}# install.packages("Rtools") # somitime requires to install other packages
# install.packages("srvyr") # For weighting complex data with the tidyverse language
# install.packages("survey") # For working with complex survey data
# install.packages("tidyverse") # A language to manipulate data in R
# install.packages("sf") # Manipulate vector objects
# install.packages("terra") # Manipulate raster objects. Terra is simpler, faster, and can do more
# install.packages("stars") # Manipulate raster objects
# install.packages("ggspatial") # Plot spatial objects with ggplot2
# install.packages("ggpubr") # To combine multiple plots
# install.packages("geodata") # GADM to import shapefiles directly from R
# install.packages("spdep") # For spatial autocorrelation analysis
# install.packages("haven") # To import SPSS, Stata, and SAS datalibrary(srvyr) # For weighting complex data with the tidyverse language
library(survey) # For working with complex survey data
library(tidyverse) # A language to manipulate data in R
library(sf) # Manipulate vector objects
library(terra) # Manipulate raster objects. Terra is simpler, faster, and can do more
library(stars) # Manipulate raster objects
library(ggspatial) # Plot spatial objects with ggplot2
library(ggpubr) # To combine multiple plots
library(geodata) # GADM to import shapefiles directly from R
library(spdep) # For spatial autocorrelation analysis
library(exactextractr) # Fast extraction from raster datasets using polygons
library(haven) # To import SPSS, Stata, and SAS data
library(tmap) # For static and interactive mapsRsenegal19gps <- st_read("data/SNG_gps/SNGE8BFL.shp")
class(senegal19gps$geometry)st_crs(senegal19gps)Coordinate Reference System:
User input: WGS 84
wkt:
GEOGCRS["WGS 84",
DATUM["World Geodetic System 1984",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["latitude",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["longitude",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4326]]
plotggplotThere are many sources where you can download Shapefile data for countries, such as:
sf package to open:senegal0 <- st_read( here::here("data", "shapefile_SEN", "gadm41_SEN_0.shp") )
class(senegal0$geometry)st_crs(senegal0)Coordinate Reference System:
User input: WGS 84
wkt:
GEOGCRS["WGS 84",
DATUM["World Geodetic System 1984",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["latitude",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["longitude",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4326]]
senegal_test <- st_read( here::here("data", "shapefile_SEN_test", "gadm41_SEN_3.shp"))st_crs(senegal_test)Coordinate Reference System:
User input: WGS 84 / UTM zone 28N
wkt:
PROJCRS["WGS 84 / UTM zone 28N",
BASEGEOGCRS["WGS 84",
DATUM["World Geodetic System 1984",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4326]],
CONVERSION["UTM zone 28N",
METHOD["Transverse Mercator",
ID["EPSG",9807]],
PARAMETER["Latitude of natural origin",0,
ANGLEUNIT["Degree",0.0174532925199433],
ID["EPSG",8801]],
PARAMETER["Longitude of natural origin",-15,
ANGLEUNIT["Degree",0.0174532925199433],
ID["EPSG",8802]],
PARAMETER["Scale factor at natural origin",0.9996,
SCALEUNIT["unity",1],
ID["EPSG",8805]],
PARAMETER["False easting",500000,
LENGTHUNIT["metre",1],
ID["EPSG",8806]],
PARAMETER["False northing",0,
LENGTHUNIT["metre",1],
ID["EPSG",8807]]],
CS[Cartesian,2],
AXIS["(E)",east,
ORDER[1],
LENGTHUNIT["metre",1]],
AXIS["(N)",north,
ORDER[2],
LENGTHUNIT["metre",1]],
ID["EPSG",32628]]
senegal_test CRS into senegal0 CRSsenegal_test_project <- st_transform(senegal_test, st_crs(senegal0))
# Alternatively, you can use the EPSG code directly as follows:
# senegal_test_project <- st_transform(senegal_test, crs = 4326)
st_crs(senegal_test_project)Coordinate Reference System:
User input: WGS 84
wkt:
GEOGCRS["WGS 84",
DATUM["World Geodetic System 1984",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["latitude",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["longitude",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4326]]
geodata package#download the shapefile of Senegal
poly.adm0 <- geodata::gadm(country="Senegal", level=0, path=tempdir())
poly.adm1 <- geodata::gadm(country="Senegal", level=1, path=tempdir())
poly.adm2 <- geodata::gadm(country="Senegal", level=2, path=tempdir())
poly.adm3 <- geodata::gadm(country="Senegal", level=3, path=tempdir())
#read it with the Sf package
adm0 <- sf::st_as_sf(poly.adm0)
adm1 <- sf::st_as_sf(poly.adm1)
adm2 <- sf::st_as_sf(poly.adm2)
adm3 <- sf::st_as_sf(poly.adm3)
#str(adm2)We created a Moderately or Severely Wasted indicator: children aged 0-59 months whose weight-for-height z-score is below -2.0 standard deviations (SD) from the median on the WHO Child Growth Standards (i.e., hc72 < -200).
First, we load the demo data.
load(here::here("data", "demodata", "demodata.RData"))demodatawt <- demodata %>% as_survey_design(ids = psu, strata = strata, weights = wt, nest = TRUE)demodata_prev_admin1 <- demodatawt %>%
srvyr::group_by(region) %>%
srvyr::summarize(
waist_prev = survey_mean(waisted)
)demodata_prev_admin1_shp = left_join( adm1%>% mutate(CC_1=as.double(CC_1)), demodata_prev_admin1 %>% mutate(CC_1=as.double(region)), by = join_by(CC_1))
class(demodata_prev_admin1_shp)[1] "sf" "data.frame"
# ggplot() +
# geom_sf(data = demodata_prev_admin1_shp, aes(fill = waist_prev)) +
# scale_fill_gradient2(name= "Waisting prevalence", low = "#4375B7", high = "#D72B22", na.value = "transparent") +
# theme_minimal() # theme_void()
ggplot() +
layer_spatial(demodata_prev_admin1_shp, aes(fill = waist_prev)) +
scale_fill_viridis_c(name= "Waisting prevalence", na.value = NA) +
theme_minimal()Plotting at lower administrative levels (e.g., Admin 2) is more challenging because DHS randomly displaces the clusters’ GPS latitude/longitude positions for confidentiality.
However, the displacement is restricted so that the points stay within the country’s Admin2 area.
Admin2 names or polygons are not included in the GPS data, so we need to spatially join the DHS GPS data with the Admin2 base-map from GADM.
st_join to Get Admin2 Namessenegal19gps_adm2 <- st_join(senegal19gps, adm2)
# we check that it worked well
anti_join(senegal19gps_adm2 |> st_drop_geometry(), adm2, by = "NAME_2") # 0 # To check the gps points that are not placed in any entity from the basemap
anti_join(adm2, senegal19gps_adm2 |> st_drop_geometry(), by = "NAME_2") # 0 # To check the map entities that don't have any gps inside themdemodata_2 <- left_join(demodata, senegal19gps_adm2, by = join_by(cluster_number == DHSCLUST))
anti_join(demodata, senegal19gps, by = join_by(cluster_number == DHSCLUST)) # This should have 0 rows if all rows are matched in the left_joindemodatawt <- demodata_2 %>% as_survey_design(ids = psu, strata = strata, weights = wt, nest = TRUE)demodata_prev_admin2 <- demodatawt %>%
srvyr::group_by(NAME_2) %>%
srvyr::summarize(
waist_prev = survey_mean(waisted)
)demodata_prev_admin2_shp <- left_join(adm2, demodata_prev_admin2, by = join_by(NAME_2 == NAME_2))
# Check that all names match : anti_join(senegal19_basemap2, senegal19_prev_admin2, by = join_by(NAME_2 == NAME_2)) wasting_plot2 = ggplot() +
layer_spatial(demodata_prev_admin2_shp, aes(fill = waist_prev)) +
scale_fill_viridis_c(name= "Waisting prevalence", na.value = NA) +
theme_minimal()
wasting_plot2ggsave(here::here("results", "prev_waist.png"), width = 15, height = 10)# Filter the data for the Dakar region
demodata_prev_admin2_shp_dkr = demodata_prev_admin2_shp[demodata_prev_admin2_shp$NAME_1 == "Dakar", ]
demodata_prev_admin2_shp_dkr = demodata_prev_admin2_shp %>% filter(NAME_1 == "Dakar")
#plot it
ggplot() +
layer_spatial(demodata_prev_admin2_shp_dkr, aes(fill = waist_prev)) +
scale_fill_viridis_c(name= "Waisting prevalence", na.value = NA) +
theme_minimal()or
Precipitation: Climate Hazards Center InfraRed Precipitation (CHIRPS)
temperature: The NASA Goddard Institute for Space Studies temperature analysis dataset (GISTEMP-v4)
Vegetation : Normalized Difference Vegetation Index (NDVI)
Land Cover : Global Land Cover Mapping and Estimation Yearly 30 m V001
Fine particulate: Global and regional PM2.5
Human presence: Global Human Settlement Layer (GHSL)
Population : WorldPop gridded population estimate datasets
The stars, raster, and terra packages allow you to read raster data.
The terra package has a single object class for raster data, SpatRaster.
A SpatRaster represents a spatially referenced surface divided into three-dimensional cells (rows, columns, and layers).
When a SpatRaster is created from a file, it does not load the cell (pixel) values into memory (RAM).
It only reads the parameters that describe the geometry of the SpatRaster, such as the number of rows and columns and the coordinate reference system. The actual values are only read when needed.
We open the file 20200508.tif located in the chirps_tif2 folder with rast command
prec_20200508 <- rast(here::here("data", "chirps_tif2", "20200508.tif"))
#prec_20230508 <- rast("data/chirps_tif2/20200508.tif") # works as well
class(prec_20200508)[1] "SpatRaster"
attr(,"package")
[1] "terra"
Then we check the object created by displaying it
prec_20200508class : SpatRaster
dimensions : 108, 173, 1 (nrow, ncol, nlyr)
resolution : 0.05, 0.05 (x, y)
extent : -19.1, -10.45, 12.05, 17.45 (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (EPSG:4326)
source : 20200508.tif
name : 20200508
This output summaries the .tif file for August 5, 2020 (2020/05/08). Note that there is:
20200508st_crs(adm2)Coordinate Reference System:
User input: WGS 84
wkt:
GEOGCRS["WGS 84",
DATUM["World Geodetic System 1984",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4326]]
st_crs(prec_20200508)Coordinate Reference System:
User input: WGS 84
wkt:
GEOGCRS["WGS 84",
ENSEMBLE["World Geodetic System 1984 ensemble",
MEMBER["World Geodetic System 1984 (Transit)"],
MEMBER["World Geodetic System 1984 (G730)"],
MEMBER["World Geodetic System 1984 (G873)"],
MEMBER["World Geodetic System 1984 (G1150)"],
MEMBER["World Geodetic System 1984 (G1674)"],
MEMBER["World Geodetic System 1984 (G1762)"],
MEMBER["World Geodetic System 1984 (G2139)"],
MEMBER["World Geodetic System 1984 (G2296)"],
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]],
ENSEMBLEACCURACY[2.0]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
USAGE[
SCOPE["Horizontal component of 3D system."],
AREA["World."],
BBOX[-90,-180,90,180]],
ID["EPSG",4326]]
Before cropping, it’s always a good idea to check the CRS of both the raster and the spatial object
It will not work if they don’t match.
The command bellow gives an error message
#prec_20230508_crop <- crop(prec_20200508, senegal_test)prec_20200508_crop <- crop(prec_20200508, adm0)
prec_20200508_cropclass : SpatRaster
dimensions : 88, 124, 1 (nrow, ncol, nlyr)
resolution : 0.05, 0.05 (x, y)
extent : -17.55, -11.35, 12.3, 16.7 (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (EPSG:4326)
source(s) : memory
varname : 20200508
name : 20200508
min value : 0.000000
max value : 1.799023
prec_20200508class : SpatRaster
dimensions : 108, 173, 1 (nrow, ncol, nlyr)
resolution : 0.05, 0.05 (x, y)
extent : -19.1, -10.45, 12.05, 17.45 (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (EPSG:4326)
source : 20200508.tif
name : 20200508
terra::writeRaster(prec_20200508_crop, here::here("data", "modified_data", "prec_prec_20200508_crop.tif"), filetype = "GTiff" , overwrite=TRUE)exactextract package provides a fast and accurate algorithm for summarizing values in the portion of a raster dataset that is covered by a polygon,prec_by_districts <- exactextractr::exact_extract(
prec_20200508,
adm2,
fun = "mean"
) %>%
as.data.frame() %>%
dplyr::rename_with(
~ifelse(
stringr::str_detect(.x, "\\."),
paste0("chirps")
)
) #%>%
# bind_cols(senegal, .)summary(prec_by_districts) chirps
Min. :0.000000
1st Qu.:0.000000
Median :0.000000
Mean :0.001005
3rd Qu.:0.000000
Max. :0.044226
You can stack multiple raster layers of the same spatial resolution and extent to create a RasterStack using raster::stack() or RasterBrick using raster::brick().
Bur rast of terra package is more powerful
Often times, processing a multi-layer object has computational advantages over processing multiple single-layer one by on
# the list of path to the files
files_list <- c("data/chirps_tif2/20200131.tif", "data/chirps_tif2/20200201.tif")
#read the two at the same time
multi_layerraster<- rast(files_list)
multi_layerrasterclass : SpatRaster
dimensions : 108, 173, 2 (nrow, ncol, nlyr)
resolution : 0.05, 0.05 (x, y)
extent : -19.1, -10.45, 12.05, 17.45 (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (EPSG:4326)
sources : 20200131.tif
20200201.tif
names : 20200131, 20200201
#first import all files in a single folder as a list
rastlist <- list.files(path = "data/chirps_tif2", pattern='.tif$', all.files= T, full.names= T)
#--- read the all at the same time ---#
allprec <- terra::rast(rastlist)
allprecclass : SpatRaster
dimensions : 108, 173, 2192 (nrow, ncol, nlyr)
resolution : 0.05, 0.05 (x, y)
extent : -19.1, -10.45, 12.05, 17.45 (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (EPSG:4326)
sources : 20150101.tif
20150102.tif
20150103.tif
... and 2189 more sources
names : 20150101, 20150102, 20150103, 20150104, 20150105, 20150106, ...
# first we create a year list
years <- map(2015:2020, ~{
list.files("data/chirps_tif2/", pattern = paste0("^", .x), full.names = TRUE)
})
# rename the list
years <- set_names(years, 2015:2020)
#create a multi-layer raster per year and store all years in one large list
years <- years %>% map(~.x %>% rast)years$`2019`class : SpatRaster
dimensions : 108, 173, 365 (nrow, ncol, nlyr)
resolution : 0.05, 0.05 (x, y)
extent : -19.1, -10.45, 12.05, 17.45 (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (EPSG:4326)
sources : 20190101.tif
20190102.tif
20190103.tif
... and 362 more sources
names : 20190101, 20190102, 20190103, 20190104, 20190105, 20190106, ...
# for a sigle year
years$`2019` %>% sum()class : SpatRaster
dimensions : 108, 173, 1 (nrow, ncol, nlyr)
resolution : 0.05, 0.05 (x, y)
extent : -19.1, -10.45, 12.05, 17.45 (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (EPSG:4326)
source(s) : memory
name : sum
min value : 60.60399
max value : 2173.61415
#More efficiently, we’ll apply the same sum function to every year in our list
chirps_yearly_sum <- map(years, ~.x %>% sum)chirps_yearly_sum <- rast(chirps_yearly_sum)
chirps_yearly_sumclass : SpatRaster
dimensions : 108, 173, 6 (nrow, ncol, nlyr)
resolution : 0.05, 0.05 (x, y)
extent : -19.1, -10.45, 12.05, 17.45 (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (EPSG:4326)
source(s) : memory
names : 2015, 2016, 2017, 2018, 2019, 2020
min values : 78.81139, 110.7043, 65.90624, 96.83039, 60.60399, 102.1226
max values : 2611.19193, 1902.9028, 1901.80054, 2018.84273, 2173.61415, 2656.6543
For every pixel (0.05 degrees lat by 0.05 degree lon), we now have the total rainfall accumulation for every year 2015-2020.
First we calculate the average yearly rainfall accumulation for each pixel (across all years):
chirps_avg <- mean(chirps_yearly_sum)The standard deviation from that average:
chirps_sd <- stdev(chirps_yearly_sum)Finally we can use both to compute a Z-score for each pixel in each year.
chirps_z <- (chirps_yearly_sum - chirps_avg) / chirps_sdclass : SpatRaster
dimensions : 108, 173, 6 (nrow, ncol, nlyr)
resolution : 0.05, 0.05 (x, y)
extent : -19.1, -10.45, 12.05, 17.45 (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (EPSG:4326)
source(s) : memory
names : 2015, 2016, 2017, 2018, 2019, 2020
min values : -0.9935492, -1.511942, -1.718446, -1.563885, -2.087076, -1.319390
max values : 2.2231631, 1.690862, 1.262613, 1.024049, 1.660715, 2.188421
We extract mean values by districts, rename columns and merge with senegal borders shapefile
tot_yearly_prec_by_dep <- exactextractr::exact_extract(
chirps_yearly_sum,
adm2,
fun = "mean"
) %>%
dplyr::rename_with(
~ifelse(
stringr::str_detect(.x, "\\."),
paste0("chirps", .)
)
) %>%
bind_cols(adm2, .)
#summary(chirps_yearly_sum)
class(tot_yearly_prec_by_dep)ggplot() +
#geom_sf(demodata_prev_admin2_shp) +
layer_spatial(mask(chirps_yearly_sum$`2019`, vect(adm2), touches = FALSE)) +
scale_fill_gradient2(low = "#D72B22", high = "#4375B7", na.value = "transparent") +
labs(fill = "Total precipitation (2019)") +
geom_sf(data = st_centroid(demodata_prev_admin2_shp), #get centroids
aes(size = waist_prev), name="Waisting prevalence") + # variable for size
layer_spatial(adm2, fill = NA) +
theme_minimal() ggplot() +
layer_spatial(demodata_prev_admin2_shp, aes(fill = waist_prev)) +
scale_fill_viridis_c(name= "Waisting prevalence", na.value = NA) +
geom_sf(data = st_centroid(tot_yearly_prec_by_dep), #get centroids
aes(size = `chirpsmean.2019`)) + # variable for size
labs(fill = "Total precipitation (2019)") +
theme_minimal() tmaptmap_mode("view")
sengal <-tm_shape(demodata_prev_admin2_shp) +
tm_borders() +
tm_polygons(col ="waist_prev", palette="Greens", values.range=.9, id="NAME_2", title="Waisting prevalence") +
# tm_compass() + tm_scale_bar() + tm_layout(legend.outside = TRUE) +
#tmap_options(check.and.fix = TRUE) +
tm_scale_bar(position = c("left", "bottom"))
sengalArlette Simo Fotso (Researcher, INED) : arlette.simo-fotso@ined.fr 
spdep packagenb <- poly2nb(demodata_prev_admin2_shp, queen=TRUE)lw <- nb2listw(nb, style="W", zero.policy=FALSE) #Setting zero.policy to FALSE will return an error if at least one polygon has no neighbormoran(demodata_prev_admin2_shp$waist_prev, listw = lw, n = length(nb), S0 = Szero(lw))$I
[1] 0.4280556
$K
[1] 3.877164
MC<- moran.mc(demodata_prev_admin2_shp$waist_prev, lw, nsim = 999)
MC$p.value[1] 0.001
It is also possible to compute Local Moran’s I, but you need large data
More here